/**Aaron Flaaen */
/**May 29, 2013 */
/**This File Preps the LFTTD Data and Matches to CMF-Products Trailer File, vis NAICS_Baseroot */

libname fromlftd  "";
libname outloc "";
libname fromcmf "";

/** *******************************************************************************/
/** 			SUMMARY OF THIS FILE					 */
/**										*/
/** FILES THAT MUST BE RUN FIRST -- NONE 				       */
/** Step 1. Prep HS to NAICS-BASEROOT Concordance Files			      */
/** Step 1.1 Read in Concordance Data for CMF-Product Line Trailer  	     */
/** Step 1.2 Read in Concordance Data for LFTTD 			    */
/** Step 2. Prep LFTTD IMPORT Data and Merge Concordance		   */
/** 	2.1 Merge Together IMP and CONCORDANCE and Collapse 		  */
/** 	2.2 Create Extracts for Japan and Not Japan and by Related Not Related */ 
/**	2.3 Merge Back Together 					*/
/** Step 3. Prep LFTTD EXPORT Data and Merge Concordance	       */
/**	3.1 Merge Together IMP and CONCORDANCE and Collapse 	      */
/**	3.2 Create Extracts for Japan and Not Japan and NA and by Related Not Related */
/**	3.3 Merge Back Together 				    */
/** Step 4. Prep CMF Product Trailer File and Merge Concordance	   */
/**	4.1 Delete Those Codes for Balancing 			  */
/**	4.2 Try to Match Codes on 6-digits (allocate) for those that didn't match */
/** Step 5. Export to Stata					 */
/**								*/
/** OUTPUT imp_naics_firm_all.dta, exp_naics_firm_all (for 2007)   */
/**        cmf_prod_naics_all.dta   			       */
/** ***********************************************************/
	
/** *******************************************************************************/
/** Step 1. Prep HS to NAICS-BASEROOT Concordance Files			 	 */
/** *****************************************************************************/

/** Step 1.1 Read in Concordance Data for CMF-Product Line Trailer */
/*
data pd07;
	infile '/pd07.csv' ;
	input baseroot $ 2-7 pc7 $ 11-17 naicsbase07 $ 21-28;
run; */

proc import out = pd07
	FILE="pd07_2.csv"
	dbms= CSV replace;
run;

data pd07;
	set pd07;
	naicsprod8 = naicsbase07;
run;

proc sort data=pd07;
	by pc7;
run;



/** *******************************************************************************/
/** Step 2. Prep CMF Product Trailer File and Merge Concordance for 2007 	 */
/** *****************************************************************************/

data cmfprod;
	set fromcmf.cmf2007prod (keep= naicspc naicspc_col pv survu_id usable_prod);
	if naicspc = "" then naicspc=naicspc_col;
	if naicspc="" and naicspc_col="" then delete;
	testneg2 = substr(naicspc,9,2);
run;

data cmf;
	set fromcmf.cmf2007 (keep = survu_id tvs);
run;

proc sort data=cmf;
	by survu_id;
run;

proc sort data=cmfprod;
	by survu_id;
run;


data cmfprod;
	merge cmfprod (in=data1) cmf (in=data2);
	by survu_id;
	if data1 and not data2 then m_var=1;
	if not data1 and data2 then m_var=2;
	if data1 and data2 then m_var=3;
run;


/* 4.1 Delete Those Codes for Balancing */
data cmfprod;
	set cmfprod (drop = m_var);
	if pv<0 then delete;
	if usable_prod="N" then delete;
	if usable_prod="D" then delete;
	if usable_prod="I" then delete;
	/*if testneg2="WW" then delete; */
run;

data cmfprod;
	set cmfprod;
	pc7 = substr(naicspc,1,7);
	if pv=0 then delete;
	if pc7 = "xxxxxxx" then delete;
run;

proc sort data=cmfprod;
	by pc7;
run;

data cmf_prod_naics;
	merge cmfprod (in=data1) pd07 (in=data2);
	by pc7;
	if data1 and not data2 then m_var=1;
	if not data1 and data2 then m_var=2;
	if data1 and data2 then m_var=3;
run;

proc freq data=cmf_prod_naics;
	tables m_var;
run;

/* 4.2 Try to Match Codes on 6-digits (allocate) for those that didn't match */
data cmf_prod_naics_unmatched;
	set cmf_prod_naics;
	if m_var = 1;
	if substr(pc7,7,1)="W";
	pc6 = substr(pc7,1,6);
run;

proc sort data=cmf_prod_naics_unmatched (drop=pc7);
	by survu_id pc6;
run;

proc means data=cmf_prod_naics_unmatched noprint;
	by survu_id pc6;
	var pv;
	output out=cmf_prod_naics_unmatched_2 sum(pv)=pv6;
run;

proc sort data=cmf_prod_naics_unmatched_2 (drop=_type_ _freq_);
	by pc6;
run;

/*calculate ratios of 7 to 6 for matched data */
data cmf_prod_naics_matched;
 	set cmf_prod_naics;
 	if m_var=3;
	pc6 = substr(pc7,1,6);
run;

proc sort data=cmf_prod_naics_matched (drop = m_var);
	by pc7;
run;

proc means data=cmf_prod_naics_matched noprint;	
	by pc7;
	var pv;
	output out=sum07 sum(pv)=pvsum7;
run;

proc sort data=sum07 (drop = _type_ _freq_);
	by pc7;
run;

proc sort data=cmf_prod_naics_matched;
	by pc6;
run;

proc means data=cmf_prod_naics_matched noprint;	
	by pc6;
	var pv;
	output out=sum06 sum(pv)=pvsum6;
run;

proc sort data=sum06 (drop = _type_ _freq_);
	by pc6;
run;

data cmf_prod_naics_matched_1;
	merge cmf_prod_naics_matched (in=data1) sum07 (in=data2) ;
	by pc7;
run;

data cmf_prod_naics_matched_2;
	merge cmf_prod_naics_matched_1 (in=data1) sum06 (in=data2) ;
	by pc6;
	ratio = pvsum7/pvsum6;
run;

proc sort data=cmf_prod_naics_matched_2 (keep=pc7 pc6 ratio baseroot) nodupkey;
	by pc7;
run;

proc sort data=cmf_prod_naics_matched_2;
	by pc6 pc7;
run;


/* Apply Ratios to unmatched data */
proc sql noprint;
	create table cmf_prod_naics_unmatched_3 as
	select *
	from cmf_prod_naics_unmatched_2 as l, cmf_prod_naics_matched_2 as r
	where l.pc6=r.pc6;
quit;

data cmf_prod_naics_unmatched_3;
	set cmf_prod_naics_unmatched_3;
	pv =pv6*ratio;
run;

proc sort data=cmf_prod_naics_unmatched_3 (drop=pc6 pv6);
	by survu_id pc7 baseroot;
run;

proc means data=cmf_prod_naics_unmatched_3 noprint;
	by survu_id pc7 baseroot;
	var pv;
	output out = cmf_prod_naics_unmatched_4 sum(pv)=pv;
run;


/*New Data */
data cmf_prod_naics_clean;
	set cmf_prod_naics_matched cmf_prod_naics_unmatched_4;
run;

proc sort data=cmf_prod_naics_clean;
	by survu_id baseroot;
run;

proc means data=cmf_prod_naics_clean noprint;
	var pv;
	by survu_id baseroot;
	output out=cmf_prod_naics_all sum(pv) = prod;
run;


data cmf_prod_naics_2007;
	set cmf_prod_naics_all (drop=_type_ _freq_);
run;

/* Clean up from this step */
proc datasets;
	delete cmf_prod_naics_unmatched cmf_prod_naics_unmatched_2 cmf_prod_naics_unmatched_3 
	cmf_prod_naics_unmatched_4 cmf_prod_naics_matched cmf_prod_naics_matched_1 cmf_prod_naics_matched_2 
	cmf_prod_naics_clean sum06 sum07;
run;


/** *******************************************************************************/
/** Step 5. Export to Stata							 */
/** *****************************************************************************/



proc export data = cmf_prod_naics_2007
	outfile = "cmf_prod_naics_2007.dta"
	dbms = stata replace;
run;



! chmod 770 *


